tr <- fread("train.csv", header = T, showProgress = F)
te <- fread("test.csv", header = T, showProgress = F)
target <- tr$target
tr$target <- NULL
summary(as.factor(target))
##      0      1 
## 179902  20098
tr$ID_code <- NULL
te$ID_code <- NULL
plot_str(tr)
plot_str(te)

only_contiguous

introduce(tr)
##      rows columns discrete_columns continuous_columns all_missing_columns
## 1: 200000     200                0                200                   0
##    total_missing_values complete_rows total_observations memory_usage
## 1:                    0        200000           40000000    320037768
introduce(te)
##      rows columns discrete_columns continuous_columns all_missing_columns
## 1: 200000     200                0                200                   0
##    total_missing_values complete_rows total_observations memory_usage
## 1:                    0        200000           40000000    320037768
plot_missing(tr)

No missing

plot_missing(te)

No missing

tr %>%
cor(use="complete.obs") %>%
corrplot(type="lower", diag=FALSE)

te %>%
cor(use="complete.obs") %>%
corrplot(type="lower", diag=FALSE)

No corrlation (Nomarized data?)

train <- fread("train.csv", header = T, showProgress = F)
feature_groups <- 3:22
col_names <- colnames(train)[c(2,feature_groups)]
temp <- gather(train[,col_names, with=F], key="features", value="value", -target)
temp$target <- factor(temp$target)
temp$features <- factor(temp$features, levels=col_names[-1], labels=col_names[-1])
ggplot(data=temp, aes(x=value)) +
  geom_density(aes(fill=target, color=target), alpha=0.3) +
  scale_color_manual(values = c("1" = "dodgerblue", "0"="firebrick1")) +
  theme_tufte() +
  facet_wrap(~ features, ncol = 4, scales = "free")

var0, 1, 2, 6, 9, 12, 13, 14, 18

col_names <- colnames(train)[c(2,feature_groups+20)]
temp <- gather(train[,col_names, with=F], key="features", value="value", -target)
temp$target <- factor(temp$target)
temp$features <- factor(temp$features, levels=col_names[-1], labels=col_names[-1])
ggplot(data=temp, aes(x=value)) +
  geom_density(aes(fill=target, color=target), alpha=0.3) +
  scale_color_manual(values = c("1" = "dodgerblue", "0"="firebrick1")) +
  theme_tufte() +
  facet_wrap(~ features, ncol = 4, scales = "free")

ver21, 22, 26, 33, 34, 35, 36

col_names <- colnames(train)[c(2,feature_groups+40)]
temp <- gather(train[,col_names, with=F], key="features", value="value", -target)
temp$target <- factor(temp$target)
temp$features <- factor(temp$features, levels=col_names[-1], labels=col_names[-1])
ggplot(data=temp, aes(x=value)) +
  geom_density(aes(fill=target, color=target), alpha=0.3) +
  scale_color_manual(values = c("1" = "dodgerblue", "0"="firebrick1")) +
  theme_tufte() +
  facet_wrap(~ features, ncol = 4, scales = "free")

ver40, 41, 44, 48, 52, 53, 55

col_names <- colnames(train)[c(2,feature_groups+60)]
temp <- gather(train[,col_names, with=F], key="features", value="value", -target)
temp$target <- factor(temp$target)
temp$features <- factor(temp$features, levels=col_names[-1], labels=col_names[-1])
ggplot(data=temp, aes(x=value)) +
  geom_density(aes(fill=target, color=target), alpha=0.3) +
  scale_color_manual(values = c("1" = "dodgerblue", "0"="firebrick1")) +
  theme_tufte() +
  facet_wrap(~ features, ncol = 4, scales = "free")

ver 66, 67, 71, 75, 76, 78

col_names <- colnames(train)[c(2,feature_groups+80)]
temp <- gather(train[,col_names, with=F], key="features", value="value", -target)
temp$target <- factor(temp$target)
temp$features <- factor(temp$features, levels=col_names[-1], labels=col_names[-1])
ggplot(data=temp, aes(x=value)) +
  geom_density(aes(fill=target, color=target), alpha=0.3) +
  scale_color_manual(values = c("1" = "dodgerblue", "0"="firebrick1")) +
  theme_tufte() +
  facet_wrap(~ features, ncol = 4, scales = "free")

ver80, 81, 86, 92, 93, 94, 95, 99

col_names <- colnames(train)[c(2,feature_groups+100)]
temp <- gather(train[,col_names, with=F], key="features", value="value", -target)
temp$target <- factor(temp$target)
temp$features <- factor(temp$features, levels=col_names[-1], labels=col_names[-1])
ggplot(data=temp, aes(x=value)) +
  geom_density(aes(fill=target, color=target), alpha=0.3) +
  scale_color_manual(values = c("1" = "dodgerblue", "0"="firebrick1")) +
  theme_tufte() +
  facet_wrap(~ features, ncol = 4, scales = "free")

ver108, 109, 110, 115, 116, 118, 119

col_names <- colnames(train)[c(2,feature_groups+120)]
temp <- gather(train[,col_names, with=F], key="features", value="value", -target)
temp$target <- factor(temp$target)
temp$features <- factor(temp$features, levels=col_names[-1], labels=col_names[-1])
ggplot(data=temp, aes(x=value)) +
  geom_density(aes(fill=target, color=target), alpha=0.3) +
  scale_color_manual(values = c("1" = "dodgerblue", "0"="firebrick1")) +
  theme_tufte() +
  facet_wrap(~ features, ncol = 4, scales = "free")

ver121, 122, 123, 125, 127, 130, 131, 132, 133, 135, 137, 139

col_names <- colnames(train)[c(2,feature_groups+140)]
temp <- gather(train[,col_names, with=F], key="features", value="value", -target)
temp$target <- factor(temp$target)
temp$features <- factor(temp$features, levels=col_names[-1], labels=col_names[-1])
ggplot(data=temp, aes(x=value)) +
  geom_density(aes(fill=target, color=target), alpha=0.3) +
  scale_color_manual(values = c("1" = "dodgerblue", "0"="firebrick1")) +
  theme_tufte() +
  facet_wrap(~ features, ncol = 4, scales = "free")

ver141, 146, 147, 148, 149, 154, 157

col_names <- colnames(train)[c(2,feature_groups+160)]
temp <- gather(train[,col_names, with=F], key="features", value="value", -target)
temp$target <- factor(temp$target)
temp$features <- factor(temp$features, levels=col_names[-1], labels=col_names[-1])
ggplot(data=temp, aes(x=value)) +
  geom_density(aes(fill=target, color=target), alpha=0.3) +
  scale_color_manual(values = c("1" = "dodgerblue", "0"="firebrick1")) +
  theme_tufte() +
  facet_wrap(~ features, ncol = 4, scales = "free")

ver 163, 164, 165, 166, 169, 170, 172, 173, 174, 177, 179

col_names <- colnames(train)[c(2,feature_groups+180)]
temp <- gather(train[,col_names, with=F], key="features", value="value", -target)
temp$target <- factor(temp$target)
temp$features <- factor(temp$features, levels=col_names[-1], labels=col_names[-1])
ggplot(data=temp, aes(x=value)) +
  geom_density(aes(fill=target, color=target), alpha=0.3) +
  scale_color_manual(values = c("1" = "dodgerblue", "0"="firebrick1")) +
  theme_tufte() +
  facet_wrap(~ features, ncol = 4, scales = "free")

ver180, 184, 188, 190, 191, 192, 198